/****************************************************************************
*  Copyright 2025 Gorgon Meducer (Email:embedded_zhuoran@hotmail.com)       *
*                                                                           *
*  Licensed under the Apache License, Version 2.0 (the "License");          *
*  you may not use this file except in compliance with the License.         *
*  You may obtain a copy of the License at                                  *
*                                                                           *
*     http://www.apache.org/licenses/LICENSE-2.0                            *
*                                                                           *
*  Unless required by applicable law or agreed to in writing, software      *
*  distributed under the License is distributed on an "AS IS" BASIS,        *
*  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
*  See the License for the specific language governing permissions and      *
*  limitations under the License.                                           *
*                                                                           *
****************************************************************************/

/*============================ INCLUDES ======================================*/
#undef __PERF_COUNT_PLATFORM_SPECIFIC_HEADER__

#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include "cmsis_compiler.h"

#define __IMPLEMENT_PERF_COUNTER
#include "perf_counter.h"

#if defined(__IS_COMPILER_GCC__)
#   pragma GCC diagnostic ignored "-Wattributes"
#endif

#if defined(__clang__)
#   pragma clang diagnostic ignored "-Wunknown-warning-option"
#   pragma clang diagnostic ignored "-Wreserved-identifier"
#   pragma clang diagnostic ignored "-Wconditional-uninitialized"
#   pragma clang diagnostic ignored "-Wcast-align"
#   pragma clang diagnostic ignored "-Wmissing-prototypes"
#   pragma clang diagnostic ignored "-Wunused-function"
#endif


/*============================ MACROS ========================================*/
#ifndef PERF_CNT_COMPENSATION_THRESHOLD
#   define PERF_CNT_COMPENSATION_THRESHOLD          16
#endif

#ifndef PERF_CNT_DELAY_US_COMPENSATION
#   define PERF_CNT_DELAY_US_COMPENSATION           90
#endif

#define MAGIC_WORD_AGENT_LIST_VALID        0x8492A53C
#define MAGIC_WORD_CANARY                  0xDEADBEEF

/*============================ MACROFIED FUNCTIONS ===========================*/

#define INT_TO_Q16(__INT)       ((__INT) << 16)
#define Q16_TO_INT(__Q16)       ((__Q16) >> 16)

/*============================ TYPES =========================================*/

struct __task_cycle_info_t {
    task_cycle_info_t       tInfo;             //!< cycle information
    int64_t                 lLastTimeStamp;    //!< previous timestamp
    task_cycle_info_agent_t tList;             //!< the root of the agent list
    uint32_t                wMagicWord;        //!< an magic word for validation
} ;

typedef int32_t q16_t;


/*============================ GLOBAL VARIABLES ==============================*/
/*============================ LOCAL VARIABLES ===============================*/

volatile static struct {

    /* raw ticks */
    struct {
        int64_t lTimestampBase;
        int64_t lOldTimestamp;
    } Ticks;

    /* microsecond */
    struct {
        union {
            uint32_t    wUnit;
            q16_t       q16Unit;
        };
    #if defined(__PERFC_USE_DEDICATED_MS_AND_US__)
        uint32_t    wResidule;
        struct {
            uint32_t    lCompensation;
            uint32_t    wResidule;
        } Overflow;

        int64_t     lTimestampBase;
        int64_t     lOldTimestamp;
    #endif
    } US;

    /* millisecond */
    struct {
        uint32_t    wUnit;
    #if defined(__PERFC_USE_DEDICATED_MS_AND_US__)
        uint32_t    wResidule;
        struct {
            uint32_t    lCompensation;
            uint32_t    wResidule;
        } Overflow;
        int64_t     lTimestampBase;
        int64_t     lOldTimestamp;
    #endif
    } MS;
    
    /* Misc */
    bool    bIsSysTimerOccupied;
    bool    bLessThan1MHz;
}PERFC = {0};

//volatile static int64_t s_lSystemClockCounts = 0;

volatile int32_t g_nOffset = 0;
volatile int64_t g_lLastTimeStamp = 0;

/*============================ PROTOTYPES ====================================*/

/* low level interface for porting */
extern
uint32_t perfc_port_get_system_timer_freq(void);

extern
int64_t perfc_port_get_system_timer_top(void);

extern
bool perfc_port_is_system_timer_ovf_pending(void);

extern
bool perfc_port_init_system_timer(bool bTimerOccupied);

extern
int64_t perfc_port_get_system_timer_elapsed(void);

extern
void perfc_port_clear_system_timer_ovf_pending(void);

extern
void perfc_port_stop_system_timer_counting(void);

extern
void perfc_port_clear_system_timer_counter(void);

/*============================ IMPLEMENTATION ================================*/


__STATIC_INLINE 
q16_t
reinterpret_q16_f32(float fIn0)
{
    return ((q16_t)((fIn0) * 65536.0f + ((fIn0) >= 0 ? 0.5f : -0.5f)));
}

__STATIC_INLINE 
float
reinterpret_f32_q16(q16_t q16In0)
{
    return ((float)(q16In0) / 65536.0f);
}


/*
 * IMPORTANT: When you want to use perf_counter APIs in ISRs having higher 
 *            priorities than __PERFC_SYSTIMER_PRIORITY__, please make sure
 *            the System Timer ISR (i.e. SysTick_Handler) have the same or 
 *            higher priority.
 */
void perfc_port_insert_to_system_timer_insert_ovf_handler(void)
{
    int64_t lLoad = perfc_port_get_system_timer_top() + 1;

    /* prevent high priority exceptions from preempting the system timer OVF 
     * exception handling
     */
    __PERFC_SAFE {
        PERFC.Ticks.lTimestampBase += lLoad;
    }

#if defined(__PERFC_USE_DEDICATED_MS_AND_US__)
    // update system ms counter
    __PERFC_SAFE {
        PERFC.MS.lTimestampBase += PERFC.MS.Overflow.lCompensation;
        
        uint32_t wResidule = PERFC.MS.wResidule + PERFC.MS.Overflow.wResidule;

        if (__UINT32_MAX__ - wResidule <= PERFC.MS.Overflow.wResidule) {
            int64_t lMSCompen = perfc_convert_ticks_to_ms(wResidule);
            PERFC.MS.wResidule = (int64_t)wResidule - perfc_convert_ms_to_ticks(lMSCompen);
            
            PERFC.MS.lTimestampBase += lMSCompen;
        } else {
            PERFC.MS.wResidule = wResidule;
        }
    }

    // update system us counter
    __PERFC_SAFE {
        PERFC.US.lTimestampBase += PERFC.US.Overflow.lCompensation;
        
        uint32_t wResidule = PERFC.US.wResidule + PERFC.US.Overflow.wResidule;

        if (__UINT32_MAX__ - wResidule <= PERFC.US.Overflow.wResidule) {
            int64_t lUSCompen = perfc_convert_ticks_to_us(wResidule);
            PERFC.US.wResidule = (int64_t)wResidule - perfc_convert_us_to_ticks(lUSCompen);
            
            PERFC.US.lTimestampBase += lUSCompen;
        } else {
            PERFC.US.wResidule = wResidule;
        }
    }
#endif
}

uint32_t perfc_get_systimer_frequency(void)
{
    return perfc_port_get_system_timer_freq();
}

__WEAK
__attribute__((noinline))
void __perf_os_patch_init(void)
{
}


void update_perf_counter(void)
{
    int64_t lLoad = perfc_port_get_system_timer_top() + 1;
    UNUSED_PARAM(lLoad);
    
    
    uint32_t wSystemFrequency = perfc_port_get_system_timer_freq();

    PERFC.bLessThan1MHz = (wSystemFrequency < 1000000ul);

    if (PERFC.bLessThan1MHz) {
        PERFC.US.q16Unit = reinterpret_q16_f32( 1000000.0f 
                                              / (float)wSystemFrequency);
    } else {
    #if defined(__PERFC_NO_Q16__)
        PERFC.US.wUnit = wSystemFrequency / 1000000ul;
    #else
        PERFC.US.q16Unit = reinterpret_q16_f32( (float)wSystemFrequency 
                                              / 1000000.0f);
    #endif
    }
    
    PERFC.MS.wUnit = wSystemFrequency / 1000ul;

#if defined(__PERFC_USE_DEDICATED_MS_AND_US__)
    PERFC.US.Overflow.lCompensation = perfc_convert_ticks_to_us(lLoad);
    PERFC.US.Overflow.wResidule = lLoad 
                                - perfc_convert_us_to_ticks(
                                            PERFC.US.Overflow.lCompensation);

    PERFC.MS.Overflow.lCompensation = perfc_convert_ticks_to_ms(lLoad);
    PERFC.MS.Overflow.wResidule = lLoad 
                                - perfc_convert_ms_to_ticks(
                                            PERFC.MS.Overflow.lCompensation);
#endif

    __PERFC_SAFE {
        g_nOffset = 0;
        __cycleof__("", {g_nOffset = __cycle_count__;}) { __NOP(); }
    }
}

bool perfc_init(bool bIsSysTimerOccupied)
{
    bool bResult = false;
    __PERFC_SAFE {
        PERFC.bIsSysTimerOccupied = bIsSysTimerOccupied;
        bResult = perfc_port_init_system_timer(bIsSysTimerOccupied);            // use the longest period
        perfc_port_clear_system_timer_ovf_pending();
    }
    
    update_perf_counter();

    PERFC.Ticks.lTimestampBase = 0;                                             // reset system cycle counter
    PERFC.Ticks.lOldTimestamp = 0;

#if defined(__PERFC_USE_DEDICATED_MS_AND_US__)
    PERFC.MS.lTimestampBase = 0;                                                // reset system millisecond counter
    PERFC.MS.lOldTimestamp = 0;
    PERFC.US.lTimestampBase = 0;                                                // reset system microsecond counter
    PERFC.US.lOldTimestamp = 0;
#endif

    __perf_os_patch_init();
    
    return bResult;
}

__STATIC_INLINE int64_t __check_and_handle_ovf(int64_t lTemp)
{
    __IRQ_SAFE {
        if (perfc_port_is_system_timer_ovf_pending()){

            perfc_port_clear_system_timer_ovf_pending();
            perfc_port_insert_to_system_timer_insert_ovf_handler();

            /* refresh the elapsed just in case the counter has just overflowed/underflowed
             * after we called the perfc_port_get_system_timer_elapsed()
             */
            lTemp = perfc_port_get_system_timer_elapsed();
        }
    }
    
    return lTemp;
}

/*! \note this function should only be called when irq is disabled
 *        hence SysTick-LOAD and (SCB->ICSR & SCB_ICSR_PENDSTSET_Msk)
 *        won't change.
 */
__STATIC_INLINE int64_t check_systick(void)
{
    int64_t lTemp = perfc_port_get_system_timer_elapsed();

    /*        Since we cannot stop counting temporarily, there are several
     *        conditions which we should take into consideration:
     *        - Condition 1: when assigning nTemp with the register value (LOAD-VAL),
     *            the underflow didn't happen but when we check the PENDSTSET bit,
     *            the underflow happens, for this condition, we should recall the
     *            perfc_port_get_system_timer_elapsed().
     *        The following code implements an equivalent logic.
     */
    if (PERFC.bIsSysTimerOccupied) {

    #if defined(__PERFC_ALLOWS_RUNNING_WIHTOUT_SYSTIMER_ISR__)
        lTemp = __check_and_handle_ovf(lTemp);
    #else
        if (perfc_port_is_system_timer_ovf_pending()){
            lTemp = perfc_port_get_system_timer_elapsed();
            lTemp += perfc_port_get_system_timer_top() + 1;
        }
    #endif
    } else {
        lTemp = __check_and_handle_ovf(lTemp);
    }

    return lTemp;
}

void before_cycle_counter_reconfiguration(void)
{
    __PERFC_SAFE {
        perfc_port_stop_system_timer_counting();

        if (perfc_port_is_system_timer_ovf_pending()) {                         
            perfc_port_clear_system_timer_ovf_pending();                        /* clear pending bit */

            perfc_port_insert_to_system_timer_insert_ovf_handler();             /* manually handle exception */

        }
        PERFC.Ticks.lTimestampBase = get_system_ticks();                        /* get the final cycle counter value */

    #if defined(__PERFC_USE_DEDICATED_MS_AND_US__)
        PERFC.US.lTimestampBase = get_system_us();                              /* get the final cycle counter value */
        PERFC.MS.lTimestampBase = get_system_ms();                              /* get the final cycle counter value */
    #endif

        perfc_port_clear_system_timer_counter();
    }
}

__attribute__((constructor))
void __perf_counter_init(void)
{
    perfc_init(true);
}

__WEAK
__attribute__((noinline))
bool perfc_delay_us_user_code_in_loop(int64_t lRemainInUs)
{
    UNUSED_PARAM(lRemainInUs);

    return true;
}

void perfc_delay_us(uint32_t wUs)
{
    int16_t lTicks = perfc_convert_us_to_ticks(wUs);
    int32_t iCompensate = g_nOffset > PERF_CNT_DELAY_US_COMPENSATION
                        ? g_nOffset 
                        : PERF_CNT_DELAY_US_COMPENSATION;

    if (lTicks <= iCompensate) {
        return ;
    }

    lTicks -= iCompensate;

    lTicks += get_system_ticks();
    do {
        int64_t lTimestamp = get_system_ticks();
        if (lTimestamp >= lTicks) {
            break;
        }
        if (!perfc_delay_us_user_code_in_loop( 
                perfc_convert_ticks_to_us(lTicks - lTimestamp) )) {
            break;
        }
    } while(1);
}

__WEAK
__attribute__((noinline))
bool perfc_delay_ms_user_code_in_loop(int64_t lRemainInMs)
{
    UNUSED_PARAM(lRemainInMs);

    return true;
}

#if __C_LANGUAGE_EXTENSIONS_PERFC_COROUTINE__
void __perfc_delay_ms(uint32_t wMs, perfc_coroutine_t *ptCoroutine)
#else
void perfc_delay_ms(uint32_t wMs)
#endif
{
    int64_t lTicks = perfc_convert_ms_to_ticks(wMs);
    int32_t iCompensate = g_nOffset > PERF_CNT_DELAY_US_COMPENSATION
                        ? g_nOffset 
                        : PERF_CNT_DELAY_US_COMPENSATION;

    if (lTicks <= iCompensate) {
        return ;
    }

    lTicks -= iCompensate;

    lTicks += get_system_ticks();
    do {
        int64_t lTimestamp = get_system_ticks();
        if (lTimestamp >= lTicks) {
            break;
        }
        if (!perfc_delay_ms_user_code_in_loop(
                perfc_convert_ticks_to_ms(lTicks - lTimestamp) )) {
            break;
        }
#if __C_LANGUAGE_EXTENSIONS_PERFC_COROUTINE__
        perfc_coroutine_yield(ptCoroutine);
#endif
    } while(1);
}

__attribute__((noinline))
int64_t get_system_ticks(void)
{
    int64_t lTemp = 0;

    __PERFC_SAFE {
        lTemp = check_systick() + PERFC.Ticks.lTimestampBase;
        
        /* When calling get_system_ticks() in an exception handler that has a  
         * higher priority than the SysTick_Handler, in some rare cases, the 
         * lTemp might be temporarily smaller than the previous value (i.e. 
         * s_lOldTimestamp), to mitigate the adverse effects of this problem,
         * we use the following code to avoid time-rolling-back issue.
         * 
         * NOTE: the issue mentioned above doesn't accumulate or have long-lasting
         *       effects.
         */
        if (lTemp < PERFC.Ticks.lOldTimestamp) {
            lTemp = PERFC.Ticks.lOldTimestamp;
        } else {
            PERFC.Ticks.lOldTimestamp = lTemp;
        }
    }

    return lTemp;
}

/*! \note the prototype of this clock() is different from the one defined in
 *!           time.h. As clock_t is usually defined as unsigned int, it is
 *!           not big enough in Cortex-M system to hold a time-stamp. clock()
 *!           defined here returns the timestamp since the begining of main()
 *!           and its unit is clock cycle (rather than 1ms). Hence, for a system
 *!           running under several hundreds MHz or even 1GHz, e.g. RT10xx from
 *!           NXP, it is very easy to see a counter overflow as clock_t is
 *!           defined as uint32_t in timer.h.
 *!           Since we are not allowed to change the defintion of clock_t in
 *!           official header file, i.e. time.h, I use a compatible prototype
 *!           after I checked the AAPCS spec. So, the return of the clock() is
 *!           int64_t, which will use the R0 to store the lower 32bits and R1
 *!           to store the higher 32bits. When you are using the prototype from
 *!           timer.h, caller will only take the lower 32bits stored in R0 and
 *!           the higher 32bits stored in R1 will be ignored.
 *!
 *!           If you want to use the non-overflow version of this clock(), please
 *!           1) define the MACRO: __PERF_CNT_USE_LONG_CLOCK__ in your project
 *!           and 2) do not include system header file <time.h>
 *!
 */
#if !defined(__IS_COMPILER_IAR__)
__attribute__((nothrow))
#endif
__attribute__((noinline))
int64_t clock(void)
{
    return get_system_ticks();
}

int64_t perfc_convert_ticks_to_ms(int64_t lTick)
{
    return lTick / (int64_t)PERFC.MS.wUnit;
}

int64_t perfc_convert_ms_to_ticks(uint32_t wMS)
{
    return (int64_t)PERFC.MS.wUnit * (int64_t)wMS;
}


int64_t perfc_convert_ticks_to_us(int64_t lTick)
{
    int64_t lResult;
    
    if (PERFC.bLessThan1MHz) {
        lResult = Q16_TO_INT(lTick * PERFC.US.q16Unit);
    } else {
    #if defined(__PERFC_NO_Q16__)
        lResult = lTick / (int64_t)PERFC.US.wUnit
    #else
        lResult = INT_TO_Q16(lTick) / PERFC.US.q16Unit;
    #endif
    }
    
    return lResult;
}

int64_t perfc_convert_us_to_ticks(uint32_t wUS)
{
    int64_t lResult;
    
    if (PERFC.bLessThan1MHz) {
        lResult = INT_TO_Q16(wUS) / PERFC.US.q16Unit;
        
    } else {
    #if defined(__PERFC_NO_Q16__)
        lResult = (int64_t)PERFC.US.wUnit * (int64_t)wUS;
    #else
        lResult = Q16_TO_INT((int64_t)wUS * (int64_t)PERFC.US.q16Unit);
    #endif
    }
    return lResult;
}

#if defined(__PERFC_USE_DEDICATED_MS_AND_US__)
int64_t get_system_ms(void)
{
    int64_t lTemp = 0;

    __PERFC_SAFE {

        lTemp = PERFC.MS.lTimestampBase 
              + perfc_convert_ticks_to_ms(   check_systick() 
                                         +   (int64_t)PERFC.MS.wResidule);

        if (lTemp < PERFC.MS.lOldTimestamp) {
            lTemp = PERFC.MS.lOldTimestamp;
        } else {
            PERFC.MS.lOldTimestamp = lTemp;
        }
    }

    return lTemp;
}

int64_t get_system_us(void)
{
    int64_t lTemp = 0;

    __PERFC_SAFE {

        lTemp = PERFC.US.lTimestampBase 
              + perfc_convert_ticks_to_us(  check_systick() 
                                         +  (int64_t)PERFC.US.wResidule);

        if (lTemp < PERFC.US.lOldTimestamp) {
            lTemp = PERFC.US.lOldTimestamp;
        } else {
            PERFC.US.lOldTimestamp = lTemp;
        }
    }

    return lTemp;
}
#endif


bool __perfc_is_time_out(int64_t lPeriod, int64_t *plTimestamp, bool bAutoReload)
{
    if (NULL == plTimestamp) {
        return false;
    }
    
    int64_t lTimestamp = get_system_ticks();

    if (0 == *plTimestamp) {
        *plTimestamp = lPeriod;
        *plTimestamp += lTimestamp;
        
        return false;
    }

    if (lTimestamp >= *plTimestamp) {
        if (bAutoReload) {
            *plTimestamp = lPeriod + lTimestamp;
        }
        return true;
    }

    return false;
}

#ifdef __PERFC_STACK_GROWS_UPWARD__

__attribute__((noinline))
bool perfc_stack_fill(uintptr_t nSP, uintptr_t nStackLimit)
{
    /* force 8bytes alignment */
    nSP = (nSP + 7) & (~((uintptr_t)0x07));
    nStackLimit &= (~((uintptr_t)0x07));
    
    /* we know the stack limit address is the last available byte address of the 
     * growing-upward stack, but just in case someone mistakenly use the
     * (base address + stack size) as the input, it is safe to ignore the 8 
     * bytes close to the (aligned) input limit address */
    nStackLimit -= 8;

    /* We don't know whether the location pointed by SP is used or not, it's 
     * safe to ignore it
     */
    nSP += 8;

    if (nSP > nStackLimit) {
        /* stack overflow */
        return false;
    }

    uint32_t * pwStackPointer = (uint32_t *) nStackLimit;
    while((uintptr_t)pwStackPointer >= nSP) {
        *pwStackPointer-- = __PERFC_STACK_WATERMARK_U32__;
    }
    
    return true;
}

__attribute__((noinline))
size_t perfc_stack_remain(uintptr_t nStackLimit)
{
    size_t nDWordCount = 0;
    nStackLimit &= (~((uintptr_t)0x07));
    nStackLimit -= 8;
    
    uint64_t *pdwCanary = (uint64_t *)nStackLimit;
    
    while(*pdwCanary-- == __PERFC_STACK_WATERMARK_U64__) {
        nDWordCount++;
    }

    return nDWordCount * sizeof(uint64_t);
}

#else
__attribute__((noinline))
bool perfc_stack_fill(uintptr_t nSP, uintptr_t nStackLimit)
{
    /* force 8bytes alignment */
    nSP &= (~((uintptr_t)0x07));
    nStackLimit = (nStackLimit + 7) & (~((uintptr_t)0x07));
    
    /* We don't know whether the location pointed by SP is used or not, it's 
     * safe to ignore it
     */
    nSP -= 8;

    if (nSP < nStackLimit) {
        /* stack overflow */
        return false;
    }

    uint32_t * pwStackPointer = (uint32_t *) nStackLimit;
    while((uintptr_t)pwStackPointer < nSP) {
        *pwStackPointer++ = __PERFC_STACK_WATERMARK_U32__;
    }
    
    return true;
}

__attribute__((noinline))
size_t perfc_stack_remain(uintptr_t nStackLimit)
{
    size_t nDWordCount = 0;

    uint64_t *pdwCanary = (uint64_t *)
            (   ((uintptr_t)(nStackLimit) + 7)
            &   (~((uintptr_t)0x07)));
    
    while(*pdwCanary++ == __PERFC_STACK_WATERMARK_U64__) {
        nDWordCount++;
    }

    return nDWordCount * sizeof(uint64_t);
}

#endif




/// Setup timer hardware.
/// \return       status (1=Success, 0=Failure)
uint32_t EventRecorderTimerSetup (void)
{
    /* doing nothing at all */
    return 1;
}

/// Get timer frequency.
/// \return       timer frequency in Hz
uint32_t EventRecorderTimerGetFreq (void)
{
    return perfc_port_get_system_timer_freq();
}

/// Get timer count.
/// \return       timer count (32-bit)
uint32_t EventRecorderTimerGetCount (void)
{
    return get_system_ticks();
}

__WEAK
task_cycle_info_t * get_rtos_task_cycle_info(void)
{
    return NULL;
}

void init_task_cycle_counter(void)
{
    struct __task_cycle_info_t * ptRootAgent =
        (struct __task_cycle_info_t *)get_rtos_task_cycle_info();
    if (NULL == ptRootAgent) {
        return ;
    }

    memset(ptRootAgent, 0, sizeof(struct __task_cycle_info_t));

    ptRootAgent->tList.ptInfo = &(ptRootAgent->tInfo);
    ptRootAgent->tInfo.lStart = get_system_ticks();
    ptRootAgent->wMagicWord = MAGIC_WORD_CANARY;
}

bool perfc_check_task_stack_canary_safe(void)
{
    struct __task_cycle_info_t * ptRootAgent =
        (struct __task_cycle_info_t *)get_rtos_task_cycle_info();
    do {
        if (NULL == ptRootAgent) {
            break;
        }
    
        if  (   (MAGIC_WORD_CANARY == ptRootAgent->wMagicWord)
            ||  (MAGIC_WORD_AGENT_LIST_VALID == ptRootAgent->wMagicWord)) {
            return true;
        }
    } while(0);
    
    return false;
}

task_cycle_info_t *init_task_cycle_info(task_cycle_info_t *ptInfo)
{
    do {
        if (NULL == ptInfo) {
            break;
        }

        memset(ptInfo, 0, sizeof(task_cycle_info_t));

        ptInfo->bEnabled = true;
    } while(0);

    return ptInfo;
}

bool enable_task_cycle_info(task_cycle_info_t *ptInfo)
{
    if (NULL == ptInfo) {
        return false;
    }
    bool bOrig;
    __PERFC_SAFE {
        bOrig = ptInfo->bEnabled;
        ptInfo->bEnabled = true;
    }
    return bOrig;
}

bool disable_task_cycle_info(task_cycle_info_t *ptInfo)
{
    if (NULL == ptInfo) {
        return false;
    }
    bool bOrig;
    __PERFC_SAFE {
        bOrig = ptInfo->bEnabled;
        ptInfo->bEnabled = false;
    }
    return bOrig;
}

void resume_task_cycle_info(task_cycle_info_t *ptInfo, bool bEnabledStatus)
{
    if (NULL == ptInfo) {
        return;
    }

    ptInfo->bEnabled = bEnabledStatus;
}


task_cycle_info_agent_t *register_task_cycle_agent(task_cycle_info_t *ptInfo,
                                             task_cycle_info_agent_t *ptAgent)
{
    __PERFC_SAFE {
        do {
            if (NULL == ptAgent || NULL == ptInfo) {
                break;
            }

            struct __task_cycle_info_t * ptRootAgent =
                (struct __task_cycle_info_t *)get_rtos_task_cycle_info();
            if (NULL == ptRootAgent) {
                break;
            }

            ptRootAgent->wMagicWord = MAGIC_WORD_AGENT_LIST_VALID;

            ptAgent->ptInfo = ptInfo;

            // push to the stack
            do {
                // set next-list
                ptAgent->ptNext = ptRootAgent->tList.ptNext;
                ptRootAgent->tList.ptNext = ptAgent;

                // set prev-list
                ptAgent->ptPrev = &(ptRootAgent->tList);
                if (NULL != ptAgent->ptNext) {
                    ptAgent->ptNext->ptPrev = ptAgent;
                }
            } while(0);

        } while(0);
    }

    return ptAgent;
}

task_cycle_info_agent_t *
unregister_task_cycle_agent(task_cycle_info_agent_t *ptAgent)
{
    __PERFC_SAFE {
        do {
            if (NULL == ptAgent) {
                break;
            }

            task_cycle_info_agent_t *ptPrev = ptAgent->ptPrev;
            if (NULL == ptPrev) {
                break;      /* this should not happen */
            }
            if (ptPrev->ptNext != ptAgent) {
                // already removed
                break;
            }

            //! remove agent from the next-list
            ptPrev->ptNext = ptAgent->ptNext;

            if (NULL != ptAgent->ptNext) {
                // remove agent from the prev-list
                ptAgent->ptNext->ptPrev = ptPrev;
            }

            ptAgent->ptNext = NULL;
            ptAgent->ptPrev = NULL;

        } while(0);
    }

    return ptAgent;
}


void __on_context_switch_in(uint32_t *pwStack)
{
    struct __task_cycle_info_t *ptRootAgent = (struct __task_cycle_info_t *)pwStack;
    int64_t lTimeStamp = get_system_ticks();

    ptRootAgent->lLastTimeStamp = lTimeStamp;
    ptRootAgent->tInfo.hwActiveCount++;

    if (MAGIC_WORD_AGENT_LIST_VALID == ptRootAgent->wMagicWord) {
        // update all agents
        task_cycle_info_agent_t *ptAgent = ptRootAgent->tList.ptNext;
        while(NULL != ptAgent) {
            if (NULL != ptAgent->ptInfo) {
                if (ptAgent->ptInfo->bEnabled) {
                    ptAgent->ptInfo->hwActiveCount++;
                }
            }
            ptAgent = ptAgent->ptNext;
        }
    }
}

void __on_context_switch_out(uint32_t *pwStack)
{
    struct __task_cycle_info_t *ptRootAgent = (struct __task_cycle_info_t *)pwStack;
    int64_t lCycleUsed = get_system_ticks() - ptRootAgent->lLastTimeStamp - g_nOffset;

    ptRootAgent->tInfo.nUsedRecent = lCycleUsed;
    ptRootAgent->tInfo.lUsedTotal += lCycleUsed;

    if (MAGIC_WORD_AGENT_LIST_VALID == ptRootAgent->wMagicWord) {
        // update all agents
        task_cycle_info_agent_t *ptAgent = ptRootAgent->tList.ptNext;
        while(NULL != ptAgent) {
            if (NULL != ptAgent->ptInfo) {
                if (ptAgent->ptInfo->bEnabled) {
                    ptAgent->ptInfo->nUsedRecent = lCycleUsed;
                    ptAgent->ptInfo->lUsedTotal += lCycleUsed;
                }
            }
            ptAgent = ptAgent->ptNext;
        }
    }
}

__attribute__((noinline))
void __start_task_cycle_counter(task_cycle_info_t *ptInfo)
{
    struct __task_cycle_info_t * ptRootAgent =
        (struct __task_cycle_info_t *)get_rtos_task_cycle_info();
    if (NULL == ptRootAgent) {
        return ;
    }

    __PERFC_SAFE {
        ptRootAgent->lLastTimeStamp = get_system_ticks();
        ptRootAgent->tInfo.lUsedTotal = 0;

        if (NULL != ptInfo) {
            ptInfo->lUsedTotal = 0;
            ptInfo->bEnabled = true;
        }
    }
}

__attribute__((noinline))
int64_t __stop_task_cycle_counter(task_cycle_info_t *ptInfo)
{
    struct __task_cycle_info_t * ptRootAgent =
        (struct __task_cycle_info_t *)get_rtos_task_cycle_info();
    if (NULL == ptRootAgent) {
        return 0;
    }

    int64_t lCycles = 0;

    __PERFC_SAFE {
        int64_t lCycleUsed = get_system_ticks() - ptRootAgent->lLastTimeStamp - g_nOffset;
        ptRootAgent->tInfo.lUsedTotal += lCycleUsed;

        if (NULL != ptInfo) {
            if (ptInfo->bEnabled) {
                ptInfo->nUsedRecent = lCycleUsed;
                ptInfo->lUsedTotal += lCycleUsed;
                ptInfo->bEnabled = false;
            }

            lCycles = ptInfo->lUsedTotal;
        } else {
            lCycles = ptRootAgent->tInfo.lUsedTotal;
        }
    }

    return lCycles;
}

